Effect of cohesin looping on genome architecture, from the perspective of the nuclear lamina.
Look whether LAD border class change after protein depletions, to prove the point that CTCF and cohesin are not involved in LAD border positioning. Or to which extent they are.
Use HMM models of the various experiments, and compare LAD border positioning with the LAD borders defined in the parental data.
Set the parameters and list the data.
library(GenomicRanges)## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::collapse() masks IRanges::collapse()
## x dplyr::combine() masks BiocGenerics::combine()
## x dplyr::desc() masks IRanges::desc()
## x tidyr::expand() masks S4Vectors::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::first() masks S4Vectors::first()
## x dplyr::lag() masks stats::lag()
## x ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## x purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## x dplyr::rename() masks S4Vectors::rename()
## x dplyr::slice() masks IRanges::slice()
library(ggbeeswarm)
library(ggplot2)
library(broom)
# Prepare output
output_dir <- "ts220117_LAD_border_changes"
dir.create(output_dir, showWarnings = FALSE)
# Load input
input_dir <- "ts220113_CTCF_enrichment_at_LAD_borders"
LADs <- readRDS(file.path(input_dir, "LADs_pA.rds"))
LAD_borders <- readRDS(file.path(input_dir, "LAD_borders_pA.rds"))
input_dir <- "ts220113_effect_of_CTCF_depletion_on_LAD_borders"
bin_size <- readRDS(file.path(input_dir, "bin_size.rds"))
metadata_damid <- readRDS(file.path(input_dir, "metadata_damid.rds"))
damid <- readRDS(file.path(input_dir, "damid.rds"))
# Select input
LADs <- LADs[[1]]
LAD_borders <- LAD_borders[[1]]
LAD_borders <- LAD_borders[LAD_borders$ovl_gene == F]
bins <- damid
mcols(bins) <- NULLKnitr set-up.
library(knitr)
opts_chunk$set(fig.width = 8, fig.height = 3.5, cache = T,
dev=c('png', 'pdf'), fig.path = file.path(output_dir, "figures/"))
pdf.options(useDingbats = FALSE)Functions.
LoadBed <- function(metadata, column, reduce = F, size_filter = F) {
# Load LADs as GRangesList from metadata object
bed <- GRangesList()
for (i in 1:nrow(metadata)) {
f.name <- (metadata %>% pull(column))[i]
f.import <- import(f.name)
if (reduce) {
f.import <- GenomicRanges::reduce(f.import, min.gapwidth = 50e3)
}
if (size_filter) {
f.import <- f.import[width(f.import) > 50e3]
}
f.import$sample <- metadata$sample[i]
f.import <- f.import[seqnames(f.import) %in% c(paste0("chr", 1:22), "chrX")]
bed <- c(bed, GRangesList(f.import))
}
names(bed) <- metadata$sample
bed
}
LADBorders <- function(LADs, bins, min.distance = 0) {
# Given a (named) GRangesList of LADs, return a GRangesList with borders
# Strand information defines left (+) or right (-) border
# Also, require complete bin object to determine chromosome start and end
cells <- names(LADs)
LAD.borders <- GRangesList()
for (cell in cells ) {
# Get LADs and bins for cell
cell.LADs <- LADs[[cell]]
cell.bins <- bins
# Remove small iLADs and remove small LADs
cell.LADs <- GenomicRanges::reduce(cell.LADs, min.gapwidth = min.distance)
cell.LADs <- cell.LADs[width(cell.LADs) > min.distance]
# Get borders
cell.borders <- c(GRanges(seqnames = seqnames(cell.LADs),
ranges = IRanges(start = start(cell.LADs),
end = start(cell.LADs)),
strand = "+"),
GRanges(seqnames = seqnames(cell.LADs),
ranges = IRanges(start = end(cell.LADs),
end = end(cell.LADs)),
strand = "-"))
cell.borders <- sort(cell.borders, ignore.strand = T)
# Get start and end of the chromosome and filter overlapping borders
chromosome.ends <- c(cell.bins[! duplicated(as.character(seqnames(cell.bins)))],
rev(cell.bins)[! duplicated(as.character(seqnames(rev(cell.bins))))])
chromosome.ends <- flank(chromosome.ends, 5000, both = T)
cell.borders <- cell.borders[! overlapsAny(cell.borders,
chromosome.ends,
ignore.strand = T)]
cell.borders$cell <- cell
LAD.borders <- c(LAD.borders, GRangesList(cell.borders))
}
names(LAD.borders) <- cells
LAD.borders
}
grMid = function(gr) {
start(gr) <- end(gr) <- rowMeans(cbind(start(gr), end(gr)))
gr
}First, load LADs.
# Prepare LAD metadata
metadata_lads <- metadata_damid %>%
filter(! condition %in% c("CTCF", "CTCFNQ")) %>%
mutate(lad_file = file.path(paste0("../results_NQ/HMM/bin-", bin_size),
str_replace(file, ".norm.txt.gz", "_AD.bed.gz")))
LAD_list <- LoadBed(metadata_lads, "lad_file", reduce = T, size_filter = T)
# Prepare borders
LAD_border_list <- LADBorders(LAD_list, bins)I will start simple: determine the distance from the selected borders to the closest border.
Then, I will plot the distance to the nearest border. However, I want to determine whether the border is displaced. If the entire LAD is lost, the nearest border will be “wrong”. To prevent this, I will only look in a 100 kb window from the parental border, and disregard nearest borders that are further away than that.
In this way, I can specifically ask whether CTCF depletion (or any other depletion) results in a shift of the LAD border. Perhaps not ideal, but together with previous analyses of average signal at LAD borders, it should be a convincing analysis. I hope.
# For all samples, determine distance to nearest border
border_distance <- tibble()
for (sample in metadata_lads$sample) {
# Get borders for sample
LAD_sample <- LAD_list[[sample]]
LAD_border_sample <- LAD_border_list[[sample]]
# Distance to nearest border
dis <- as_tibble(distanceToNearest(LAD_borders, LAD_border_sample, ignore.strand = T)) %>%
rename_all(~ c("border_idx", "sample_idx", "distance"))
# Filter and add metadata
dis <- dis %>%
arrange(sample_idx, distance) %>%
filter(! duplicated(sample_idx)) %>%
mutate(sample = sample) %>%
mutate(border_strand = as.character(strand(LAD_borders))[border_idx],
sample_strand = as.character(strand(LAD_border_sample))[sample_idx],
border_within_lad = overlapsAny(LAD_borders[border_idx], LAD_sample,
ignore.strand = T),
sample_within_lad = overlapsAny(LAD_border_sample[sample_idx], LADs,
ignore.strand = T)) %>%
mutate(distance = case_when(sample_within_lad == T ~ distance,
T ~ - distance))
# Add to all border distances
border_distance <- bind_rows(border_distance,
dis)
}
# Prepare for plotting
border_distance <- border_distance %>%
separate(sample, c("condition", "timepoint"), remove = F) %>%
mutate(condition = factor(condition, levels(metadata_lads$condition)),
timepoint = factor(timepoint, levels(metadata_lads$timepoint)),
distance_kb = distance / 1e3,
distance_kb = (distance_kb %/% 10) * 10) %>%
filter(timepoint != "96h") %>%
mutate(CTCF = LAD_borders$CTCF[border_idx],
CTCF_strand = LAD_borders$CTCF_strand[border_idx],
CTCF_count = LAD_borders$CTCF.count[border_idx],
CTCF_strand = factor(CTCF_strand, c("outwards", "inwards", "ambiguous", "nonCTCF"))) %>%
mutate(CTCF_count = case_when(CTCF_count >= 3 ~ ">=3",
CTCF_count == 2 ~ "2",
CTCF_count == 1 ~ "1",
T ~ "nonCTCF"),
CTCF_count = factor(CTCF_count,
levels = c("nonCTCF", "1", "2", ">=3")))
# Plot this
border_distance %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))border_distance %>%
filter(condition == "CTCFEL") %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))border_distance %>%
filter(condition %in% c("RAD21", "WAPL", "CTCFWAPL")) %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))# Plot this - assuming shift less than 100kb and removing PT
border_distance %>%
filter(condition != "PT",
distance_kb > -101, distance_kb < 101) %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))border_distance %>%
filter(condition == "CTCFEL",
distance_kb > -101, distance_kb < 101) %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))border_distance %>%
filter(condition %in% c("RAD21", "WAPL", "CTCFWAPL"),
distance_kb > -101, distance_kb < 101) %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))# Plot this - assuming shift less than 100kb and removing PT, with CTCF strand information
border_distance %>%
filter(condition != "PT",
distance_kb > -101, distance_kb < 101) %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF_strand ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))# Plot this - assuming shift less than 100kb and removing PT, with CTCF count information
border_distance %>%
filter(condition == "CTCFEL",
distance_kb > -101, distance_kb < 101) %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF_count ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))border_distance %>%
filter(condition != "PT",
distance_kb > -101, distance_kb < 101) %>%
ggplot(aes(x = distance_kb, col = timepoint)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(CTCF_count ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))# Plot this - assuming shift less than 100kb and removing PT, by border type
border_distance %>%
filter(condition != "PT",
distance_kb > -101, distance_kb < 101) %>%
ggplot(aes(x = distance_kb, col = CTCF)) +
stat_ecdf(geom = "line") +
annotate("rect", xmin = 0, xmax = Inf, ymin = -Inf, ymax = Inf,
fill = "grey", alpha = 0.3) +
geom_hline(yintercept = 0, col = "black", linetype = "dashed") +
geom_vline(xintercept = 0, col = "black", linetype = "dashed") +
facet_grid(timepoint ~ condition) +
coord_cartesian(xlim = c(-100, 100)) +
xlab("Distance to LAD border (kb)") +
ylab("Cum density") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))This looks sort of as expected. Looks good. I can use these figures in the manuscript.
A different approach - use the border to determine a linear slope and compare slope / intercept.
# Get border bins
max_distance <- 3e4
bins_mid <- grMid(bins)
dis <- as_tibble(distanceToNearest(bins_mid, LAD_borders, ignore.strand = T)) %>%
rename_at(1:2, ~ c("bin_idx", "border_idx")) %>%
filter(distance < max_distance) %>%
mutate(LAD_ovl = overlapsAny(bins_mid[bin_idx], LADs),
distance = case_when(LAD_ovl == T ~ distance,
T ~ -distance),
distance = distance / 1e3,
distance = (distance %/% 10) * 10)
# Add DamID values
dis <- bind_cols(dis,
as_tibble(mcols(damid))[dis$bin_idx, ] %>%
dplyr::select(one_of(metadata_lads$sample))) %>%
arrange(border_idx, distance) %>%
dplyr::select(-bin_idx, -LAD_ovl) %>%
gather(key, value, -border_idx, -distance) %>%
drop_na()
# Linear models
border_models <- dis %>%
group_by(key, border_idx) %>%
do(border_fit = tidy(lm(value ~ distance, data = .))) %>%
unnest(border_fit)# Prepare for plotting
border_models_plot <- border_models %>%
mutate(term = str_remove_all(term, "\\(|\\)")) %>%
dplyr::select(key, border_idx, term, estimate) %>%
spread(term, estimate) %>%
separate(key, c("condition", "timepoint"), remove = F) %>%
mutate(condition = factor(condition, levels(metadata_lads$condition)),
timepoint = factor(timepoint, levels(metadata_lads$timepoint))) %>%
mutate(CTCF = LAD_borders$CTCF[border_idx]) %>%
filter(timepoint != "96h")
# Plot models
border_models_plot %>%
ggplot(aes(x = timepoint, y = distance, col = CTCF, group = interaction(timepoint, CTCF))) +
geom_quasirandom(dodge.width = 0.8) +
geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
facet_grid(. ~ condition, scale = "free_x", space = "free_x") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(#aspect.ratio = 3,
axis.text.x = element_text(angle = 90, hjust = 1))## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 2 rows containing missing values (position_quasirandom).
border_models_plot %>%
ggplot(aes(x = timepoint, y = Intercept, col = CTCF, group = interaction(timepoint, CTCF))) +
geom_quasirandom(dodge.width = 0.8) +
geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
facet_grid(. ~ condition, scale = "free_x", space = "free_x") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(#aspect.ratio = 3,
axis.text.x = element_text(angle = 90, hjust = 1))border_models_plot %>%
mutate(timepoint = factor(timepoint, rev(levels(metadata_lads$timepoint)))) %>%
gather(parameter, parameter_value, distance, Intercept) %>%
ggplot(aes(x = timepoint, y = parameter_value,
col = CTCF, group = interaction(timepoint, CTCF))) +
geom_quasirandom(dodge.width = 0.8) +
geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
coord_flip() +
facet_grid(condition ~ parameter, scale = "free", space = "free_y") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(#aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 2 rows containing missing values (position_quasirandom).
border_models_plot %>%
mutate(CTCF = factor(CTCF, c("nonCTCF", "CTCF"))) %>%
gather(parameter, parameter_value, distance, Intercept) %>%
ggplot(aes(x = CTCF, y = parameter_value,
col = timepoint, group = interaction(CTCF, timepoint))) +
geom_quasirandom(dodge.width = 0.8) +
geom_hline(yintercept = 0, col = "grey", linetype = "dashed") +
geom_boxplot(col = "black", outlier.shape = NA, fill = NA) +
coord_flip() +
facet_grid(condition ~ parameter, scale = "free", space = "free_y") +
scale_color_brewer(palette = "Set1") +
theme_bw() +
theme(#aspect.ratio = 1,
axis.text.x = element_text(angle = 90, hjust = 1))## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 3 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 1 rows containing missing values (position_quasirandom).
## Warning: Removed 2 rows containing missing values (position_quasirandom).
# Determine significance - between border classes
border_models_plot %>%
group_by(condition, timepoint) %>%
do(test_slope = tidy(wilcox.test(distance ~ CTCF, data = .))) %>%
unnest(test_slope) %>%
mutate(padj = p.adjust(p.value),
sign = padj < 0.05)## # A tibble: 14 × 8
## condition timepoint statistic p.value method alternative padj sign
## <fct> <fct> <dbl> <dbl> <chr> <chr> <dbl> <lgl>
## 1 PT 0h 160233 4.77e-16 Wilcoxon r… two.sided 5.24e-15 TRUE
## 2 PT 24h 164092 3.07e-19 Wilcoxon r… two.sided 3.98e-18 TRUE
## 3 CTCFEL 0h 152742 6.02e-11 Wilcoxon r… two.sided 4.81e-10 TRUE
## 4 CTCFEL 6h 142146 2.57e- 5 Wilcoxon r… two.sided 5.15e- 5 TRUE
## 5 CTCFEL 24h 138983 4.46e- 4 Wilcoxon r… two.sided 4.46e- 4 TRUE
## 6 RAD21 0h 154708 3.44e-12 Wilcoxon r… two.sided 3.10e-11 TRUE
## 7 RAD21 6h 148114 5.01e- 8 Wilcoxon r… two.sided 3.01e- 7 TRUE
## 8 RAD21 24h 145961 6.45e- 7 Wilcoxon r… two.sided 3.22e- 6 TRUE
## 9 WAPL 0h 165520 1.69e-20 Wilcoxon r… two.sided 2.36e-19 TRUE
## 10 WAPL 6h 159255 2.74e-15 Wilcoxon r… two.sided 2.74e-14 TRUE
## 11 WAPL 24h 150044 3.01e- 9 Wilcoxon r… two.sided 2.11e- 8 TRUE
## 12 CTCFWAPL 0h 161698 9.83e-18 Wilcoxon r… two.sided 1.18e-16 TRUE
## 13 CTCFWAPL 6h 144143 4.71e- 6 Wilcoxon r… two.sided 1.46e- 5 TRUE
## 14 CTCFWAPL 24h 144382 3.66e- 6 Wilcoxon r… two.sided 1.46e- 5 TRUE
border_models_plot %>%
group_by(condition, timepoint) %>%
do(test_intercept = tidy(wilcox.test(Intercept ~ CTCF, data = .))) %>%
unnest(test_intercept) %>%
mutate(padj = p.adjust(p.value),
sign = padj < 0.05)## # A tibble: 14 × 8
## condition timepoint statistic p.value method alternative padj sign
## <fct> <fct> <dbl> <dbl> <chr> <chr> <dbl> <lgl>
## 1 PT 0h 124460 8.05e- 1 Wilcoxon r… two.sided 1 e+ 0 FALSE
## 2 PT 24h 137856 1.40e- 3 Wilcoxon r… two.sided 7.01e- 3 TRUE
## 3 CTCFEL 0h 100857 7.58e- 7 Wilcoxon r… two.sided 7.58e- 6 TRUE
## 4 CTCFEL 6h 121181 6.35e- 1 Wilcoxon r… two.sided 1 e+ 0 FALSE
## 5 CTCFEL 24h 122137 7.92e- 1 Wilcoxon r… two.sided 1 e+ 0 FALSE
## 6 RAD21 0h 108392 1.01e- 3 Wilcoxon r… two.sided 6.05e- 3 TRUE
## 7 RAD21 6h 106406 1.95e- 4 Wilcoxon r… two.sided 1.55e- 3 TRUE
## 8 RAD21 24h 104841 4.71e- 5 Wilcoxon r… two.sided 4.24e- 4 TRUE
## 9 WAPL 0h 92079 6.11e-12 Wilcoxon r… two.sided 7.33e-11 TRUE
## 10 WAPL 6h 70856 7.70e-31 Wilcoxon r… two.sided 1.00e-29 TRUE
## 11 WAPL 24h 67776 2.31e-34 Wilcoxon r… two.sided 3.24e-33 TRUE
## 12 CTCFWAPL 0h 99266 1.18e- 7 Wilcoxon r… two.sided 1.30e- 6 TRUE
## 13 CTCFWAPL 6h 106397 1.94e- 4 Wilcoxon r… two.sided 1.55e- 3 TRUE
## 14 CTCFWAPL 24h 111859 1.16e- 2 Wilcoxon r… two.sided 4.62e- 2 TRUE
This attempt was unsuccessful. Linear models at LAD borders are too unreliable.
Looks good. I can show that LAD border positioning is only mildly affected.
sessionInfo()## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.3 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] knitr_1.33 broom_0.7.9 ggbeeswarm_0.6.0
## [4] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
## [7] purrr_0.3.4 readr_2.0.0 tidyr_1.1.3
## [10] tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
## [13] rtracklayer_1.50.0 GenomicRanges_1.42.0 GenomeInfoDb_1.26.7
## [16] IRanges_2.24.1 S4Vectors_0.28.1 BiocGenerics_0.36.1
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-7 matrixStats_0.60.0
## [3] fs_1.5.0 lubridate_1.7.10
## [5] RColorBrewer_1.1-2 httr_1.4.2
## [7] tools_4.0.5 backports_1.2.1
## [9] bslib_0.2.5.1 utf8_1.2.2
## [11] R6_2.5.1 vipor_0.4.5
## [13] DBI_1.1.1 colorspace_2.0-2
## [15] withr_2.4.2 tidyselect_1.1.1
## [17] compiler_4.0.5 cli_3.1.0
## [19] rvest_1.0.1 Biobase_2.50.0
## [21] xml2_1.3.2 DelayedArray_0.16.3
## [23] labeling_0.4.2 sass_0.4.0
## [25] scales_1.1.1 digest_0.6.28
## [27] Rsamtools_2.6.0 rmarkdown_2.11
## [29] XVector_0.30.0 pkgconfig_2.0.3
## [31] htmltools_0.5.1.1 MatrixGenerics_1.2.1
## [33] highr_0.9 dbplyr_2.1.1
## [35] rlang_0.4.12 readxl_1.3.1
## [37] rstudioapi_0.13 farver_2.1.0
## [39] jquerylib_0.1.4 generics_0.1.0
## [41] jsonlite_1.7.2 BiocParallel_1.24.1
## [43] RCurl_1.98-1.3 magrittr_2.0.1
## [45] GenomeInfoDbData_1.2.4 Matrix_1.3-4
## [47] Rcpp_1.0.7 munsell_0.5.0
## [49] fansi_0.5.0 lifecycle_1.0.1
## [51] stringi_1.7.3 yaml_2.2.1
## [53] SummarizedExperiment_1.20.0 zlibbioc_1.36.0
## [55] grid_4.0.5 crayon_1.4.2
## [57] lattice_0.20-44 Biostrings_2.58.0
## [59] haven_2.4.1 hms_1.1.0
## [61] pillar_1.6.4 codetools_0.2-18
## [63] reprex_2.0.0 XML_3.99-0.6
## [65] glue_1.5.0 evaluate_0.14
## [67] modelr_0.1.8 vctrs_0.3.8
## [69] tzdb_0.1.2 cellranger_1.1.0
## [71] gtable_0.3.0 assertthat_0.2.1
## [73] xfun_0.24 GenomicAlignments_1.26.0
## [75] beeswarm_0.4.0 ellipsis_0.3.2